# ------------------------------ 
# ' multilevel suicide paper replication package files
# ' you should execute the following functions step by step to replicate all figures/tables
# ------------------------------

# specify your paths for data and code
root_path = '/N/project/suicide_study'

data_path = file.path(root_path,'pnas_replication','data')
code_path = file.path(root_path,'pnas_replication','coce')

rawdata_path = file.path(data_path,'rawdata')
processed_path = file.path(data_path, 'processed')

log_path = file.path(root_path,'pnas_replication','results','log')
figure_path = file.path(root_path,'pnas_replication','results','figure')

# load library
base_package = c('rio','data.table','fst','skimr','readr','stringr','refinr','haven','mice','parallel','readxl',
	'tidyverse','hrbrthemes','extrafont','viridis','ggpubr','ggsci')
invisible(lapply(base_package, library, character.only=TRUE))

# 1. clean NVDRS file
source(file.path(code_path,'1_clean_NVDRS.R'))

# 2. clean ACS file 
source(file.path(code_path,'2_clean_ACS_PUMS.R'))

# 3. clean RCMS files
source(file.path(code_path,'3_clean_RCMS.R'))

# 4. create weight to match the geographic unit between the ACS PUMS data (puma) and NVDRS (county)
source(file.path(code_path,'4_create_county_weight_for_ACS.R'))

# 5. merge NVDRS + ACS + RCMS + other county measures
source(file.path(code_path,'5_merge_NVDRS_ACS_RCMS_rounty.R'))

# 6. impute missing values using MICE package in R
source(file.path(code_path,'6_impute_reg_table.R'))

# 7. create regression table based on the imputed measure
source(file.path(code_path,'7_create_reg_table_main.R'))
source(file.path(code_path,'7_create_reg_table_imputed.R'))

# 8. run regression models using the raw data, and imputed samples 
# since it takes long time to run this model, we have to use bash script to split fitting the model
source(file.path(code_path,'8_model_execute_command.R'))
source(file.path(code_path,'8_stata_run_appendix_cb.sh'))
source(file.path(code_path,'8_stata_run_bash_cb.sh'))

# the above files will run the following two stata do files
source(file.path(code_path,'8_regression_model.do')) 
source(file.path(code_path,'8_table_regression_all_table.do')) # this code will generate all Tables in the paper

# then, based on marginal input files, we generate the margin plots 
# first run the following do-file to transform smcl file to txt file
system(paste0('stata-mp do ',file.path(code_path,'9_transform_From_smcl_to_txt.do')))

# to generate Figure 1, 2, 3, and S2
source(file.path(code_path,'9_figure_pnas_all.R')) 

# for Figure S1 : to examine the categorical distribution across counties
source(file.path(code_path,'9_figure_category_distribution.R'))
